From 40873a6637aba06cfdda586257e5080456ad90bb Mon Sep 17 00:00:00 2001 From: "kaf24@firebug.cl.cam.ac.uk" Date: Mon, 25 Apr 2005 16:53:12 +0000 Subject: [PATCH] bitkeeper revision 1.1383 (426d2078vvMiOXQcJ98uhha5uIt0Sw) Enormously better process destruction performance. By not running on pagetables when we are destroying them we massively reduce TLB flushes and increase writable p.t. batches. This patch would probably improve even native Linux performance, because of the rabid ZAP_BLOCK_SIZE changes to unmap_vmas() that were introduced to 2.6.11. :-) Signed-off-by: Keir Fraser --- .../arch/xen/i386/kernel/smp.c | 6 +++--- .../arch/xen/i386/mm/hypervisor.c | 12 ++++++++---- .../arch/xen/x86_64/mm/hypervisor.c | 12 ++++++++---- .../include/asm-xen/asm-i386/mmu_context.h | 19 ++++++++++++++----- .../include/asm-xen/asm-i386/tlbflush.h | 9 ++++++--- .../include/asm-xen/asm-x86_64/tlbflush.h | 9 ++++++--- .../include/asm-xen/hypervisor.h | 4 ++-- 7 files changed, 47 insertions(+), 24 deletions(-) diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c index bcd398354e..915034b36d 100644 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c @@ -448,11 +448,11 @@ irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id, struct pt_regs *regs) { return 0; } void flush_tlb_current_task(void) -{ xen_tlb_flush_mask(current->mm->cpu_vm_mask); } +{ xen_tlb_flush_mask(¤t->mm->cpu_vm_mask); } void flush_tlb_mm(struct mm_struct * mm) -{ xen_tlb_flush_mask(mm->cpu_vm_mask); } +{ xen_tlb_flush_mask(&mm->cpu_vm_mask); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) -{ xen_invlpg_mask(vma->vm_mm->cpu_vm_mask, va); } +{ xen_invlpg_mask(&vma->vm_mm->cpu_vm_mask, va); } void flush_tlb_all(void) { xen_tlb_flush_all(); } diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c index 0d212b295a..0c7738276b 100644 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c @@ -106,11 +106,13 @@ void xen_tlb_flush_all(void) BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } -void xen_tlb_flush_mask(cpumask_t mask) +void xen_tlb_flush_mask(cpumask_t *mask) { struct mmuext_op op; + if ( cpus_empty(*mask) ) + return; op.cmd = MMUEXT_TLB_FLUSH_MULTI; - op.cpuset = mask.bits; + op.cpuset = mask->bits; BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } @@ -122,11 +124,13 @@ void xen_invlpg_all(unsigned long ptr) BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } -void xen_invlpg_mask(cpumask_t mask, unsigned long ptr) +void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr) { struct mmuext_op op; + if ( cpus_empty(*mask) ) + return; op.cmd = MMUEXT_INVLPG_MULTI; - op.cpuset = mask.bits; + op.cpuset = mask->bits; op.linear_addr = ptr & PAGE_MASK; BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/hypervisor.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/hypervisor.c index f7ab076cb8..ad04d3f1f2 100644 --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/hypervisor.c +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/hypervisor.c @@ -116,11 +116,13 @@ void xen_tlb_flush_all(void) BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } -void xen_tlb_flush_mask(cpumask_t mask) +void xen_tlb_flush_mask(cpumask_t *mask) { struct mmuext_op op; + if ( cpus_empty(*mask) ) + return; op.cmd = MMUEXT_TLB_FLUSH_MULTI; - op.cpuset = mask.bits[0]; + op.cpuset = mask->bits; BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } @@ -132,11 +134,13 @@ void xen_invlpg_all(unsigned long ptr) BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } -void xen_invlpg_mask(cpumask_t mask, unsigned long ptr) +void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr) { struct mmuext_op op; + if ( cpus_empty(*mask) ) + return; op.cmd = MMUEXT_INVLPG_MULTI; - op.cpuset = mask.bits[0]; + op.cpuset = mask->bits; op.linear_addr = ptr & PAGE_MASK; BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h index 2150a8dcf9..f6f8b04722 100644 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h @@ -16,7 +16,7 @@ void destroy_context(struct mm_struct *mm); static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { -#if 0 /* XEN */ +#if 0 /* XEN: no lazy tlb */ unsigned cpu = smp_processor_id(); if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_LAZY; @@ -51,7 +51,7 @@ static inline void switch_mm(struct mm_struct *prev, if (likely(prev != next)) { /* stop flush ipis for the previous mm */ cpu_clear(cpu, prev->cpu_vm_mask); -#if 0 /* XEN */ +#if 0 /* XEN: no lazy tlb */ per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK; per_cpu(cpu_tlbstate, cpu).active_mm = next; #endif @@ -76,7 +76,7 @@ static inline void switch_mm(struct mm_struct *prev, BUG_ON(HYPERVISOR_mmuext_op(_op, op-_op, NULL, DOMID_SELF)); } -#if 0 /* XEN */ +#if 0 /* XEN: no lazy tlb */ else { per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK; BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next); @@ -92,8 +92,17 @@ static inline void switch_mm(struct mm_struct *prev, #endif } -#define deactivate_mm(tsk, mm) \ - asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0)) +/* + * XEN: We aggressively remove defunct pgd from cr3. We execute unmap_vmas() + * *much* faster this way, as no tlb flushes means much bigger wrpt batches. + */ +#define deactivate_mm(tsk, mm) do { \ + asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \ + if ((mm) && cpu_isset(smp_processor_id(), (mm)->cpu_vm_mask)) { \ + cpu_clear(smp_processor_id(), (mm)->cpu_vm_mask); \ + load_cr3(swapper_pg_dir); \ + } \ +} while (0) #define activate_mm(prev, next) do { \ switch_mm((prev),(next),NULL); \ diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h index 4d13a650a2..ed0d735737 100644 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h @@ -40,21 +40,24 @@ extern unsigned long pgkern_mask; static inline void flush_tlb_mm(struct mm_struct *mm) { - if (mm == current->active_mm) + /* XEN: cpu_vm_mask is more accurate than active_mm. */ + if (cpu_isset(smp_processor_id(), mm->cpu_vm_mask)) __flush_tlb(); } static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { - if (vma->vm_mm == current->active_mm) + /* XEN: cpu_vm_mask is more accurate than active_mm. */ + if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) __flush_tlb_one(addr); } static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - if (vma->vm_mm == current->active_mm) + /* XEN: cpu_vm_mask is more accurate than active_mm. */ + if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) __flush_tlb(); } diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h index 35fd9b530d..3aa6115a61 100644 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h @@ -44,21 +44,24 @@ extern unsigned long pgkern_mask; static inline void flush_tlb_mm(struct mm_struct *mm) { - if (mm == current->active_mm) + /* XEN: cpu_vm_mask is more accurate than active_mm. */ + if (cpu_isset(smp_processor_id(), mm->cpu_vm_mask)) __flush_tlb(); } static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { - if (vma->vm_mm == current->active_mm) + /* XEN: cpu_vm_mask is more accurate than active_mm. */ + if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) __flush_tlb_one(addr); } static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - if (vma->vm_mm == current->active_mm) + /* XEN: cpu_vm_mask is more accurate than active_mm. */ + if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) __flush_tlb(); } diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h b/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h index bc223246d0..5a0da00816 100644 --- a/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h @@ -110,8 +110,8 @@ void xen_machphys_update(unsigned long mfn, unsigned long pfn); #include void xen_tlb_flush_all(void); void xen_invlpg_all(unsigned long ptr); -void xen_tlb_flush_mask(cpumask_t mask); -void xen_invlpg_mask(cpumask_t mask, unsigned long ptr); +void xen_tlb_flush_mask(cpumask_t *mask); +void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr); #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -- 2.30.2